####beginning of the script####
###Options###
options(scipen = 999)

###Database###
#To download the database, access https://www.cesop.unicamp.br/. 
#Then rename the file to "eseb.sav" and save it in the working directory
#to execute this scrip.

library(haven)
library(sjlabelled)
eseb <- read_sav("eseb.sav")
eseb <- remove_all_labels(eseb)

###Variable Recoding###
library(memisc)
##Dependent Variables##
#1 round
eseb$dep1a <- recode(eseb$Q12P1_B, "Bolsonaro" <- 9,
                     "Others" <- c(1,2,3,4,5,6,7,8,10,12,13,14, 50:99))
eseb$dep1a <- relevel(eseb$dep1, "Others")

#2 Round
eseb$dep2a <- recode(eseb$Q12P2_B, "Bolsonaro" <- 2, "Others" <- c(1,50:99))
eseb$dep2a <- relevel(eseb$dep2, "Others")

##Independente Variables##
#Media Use#
eseb$mjor <- as.factor(ifelse(eseb$P25 == "1", "Yes", "No"))
eseb$mntv <- as.factor(ifelse(eseb$P25 == "2", "Yes", "No"))
eseb$mrad <- as.factor(ifelse(eseb$P25 == "3", "Yes", "No"))
eseb$mfcb <- as.factor(ifelse(eseb$P26 == "1", "Yes", "No"))
eseb$mytb <- as.factor(ifelse(eseb$P26 == "3", "Yes", "No"))
eseb$mwta <- as.factor(ifelse(eseb$P26 == "4", "Yes", "No"))

#Populism#
eseb$pop1 <- recode(eseb$Q401, 1 <- 5, 2 <- 4, 3 <- 3, 4 <- 2, 5 <- 1, NA <- c(7:8))
eseb$pop2 <- recode(eseb$Q402, 1 <- 5, 2 <- 4, 3 <- 3, 4 <- 2, 5 <- 1, NA <- c(7:8))
eseb$pop3 <- recode(eseb$Q403, 1 <- 1, 2 <- 2, 3 <- 3, 4 <- 4, 5 <- 5, NA <- c(7:8))
eseb$pop4 <- recode(eseb$Q404, 1 <- 5, 2 <- 4, 3 <- 3, 4 <- 2, 5 <- 1, NA <- c(7:8))
eseb$pop5 <- recode(eseb$Q405, 1 <- 5, 2 <- 4, 3 <- 3, 4 <- 2, 5 <- 1, NA <- c(7:8))
eseb$pop6 <- recode(eseb$Q406, 1 <- 5, 2 <- 4, 3 <- 3, 4 <- 2, 5 <- 1, NA <- c(7:8))
eseb$pop7 <- recode(eseb$Q407, 1 <- 5, 2 <- 4, 3 <- 3, 4 <- 2, 5 <- 1, NA <- c(7:8))
eseb$pop8 <- recode(eseb$Q502, 1 <- 5, 2 <- 4, 3 <- 3, 4 <- 2, 5 <- 1, NA <- c(7:8))
eseb$pop9 <- recode(eseb$P18, 1 <- 5, 2 <- 4, 3 <- 3, 4 <- 2, 5 <- 1, NA <- c(8:9))
eseb$pop10 <- recode(eseb$Q501, 1 <- 5, 2 <- 4, 3 <- 3, 4 <- 2, 5 <- 1, NA <- c(8:9))

#Sociotropic Economic Evaluation (1 = Worse/5 = Better)
eseb$a.eco <- ifelse(eseb$Q11 > 6, NA, eseb$Q11)
eseb$a.eco <- 6 - eseb$a.eco 

#Government (Office) Evaluation (1 = Worse/5 = Better)
eseb$a.gov <- ifelse(eseb$Q9 > 5, NA, eseb$Q9)
eseb$a.gov <- 6 - eseb$a.gov

#Ideology
eseb$Ideologia <- recode(eseb$Q18, "Non Ideological" <- c(95,97,98), "Left" <- c(0:3), 
                         "Center" <- c(4:6), "Right" <- c(7:10))
eseb$Ideologia <- relevel(eseb$Ideologia, "Center")

#Ideology Dummy
eseb$ddir <- ifelse(eseb$Ideologia == "Right", "Right", "Others")
eseb$desq <- ifelse(eseb$Ideologia == "Left", "Left", "Others")
eseb$ddir <- as.factor(eseb$ddir)
eseb$desq <- as.factor(eseb$desq)
eseb$desq <- relevel(eseb$desq, "Others")

#Antipetismo
eseb$antipt <- ifelse(eseb$Q1501 > 10, NA, eseb$Q1501)
eseb$antipt <- 11 - eseb$antipt

#Class Mobility
eseb$mobnega <- ifelse(eseb$P28A > 6, NA, eseb$P28A)
eseb$mobnegb <- ifelse(eseb$P28B > 6, NA, eseb$P28B)
eseb$mobnegt <- ifelse(eseb$mobnega > eseb$mobnegb, -1, 1)
eseb$mobneg <-  ifelse(eseb$mobnegt == -1, "Risen", "Fallen")
eseb$mobneg <-  as.factor(ifelse(is.na(eseb$mobnegt),"Same", eseb$mobneg))
eseb$mobneg <- relevel(eseb$mobneg, "Same")

#Education
eseb$Escolaridade <- recode(eseb$D3_ESCOLA, "Low" <- c(0:2), "Average" <- c(3:6),
                            "High" <- c(7:9))

#Education Dummy
eseb$escd <- recode(eseb$D3_ESCOLA, "Low/ Average" <- c(0:6), "High" <- c(7:9))

#Bolsa Família
eseb$bf <- factor(ifelse(eseb$P29 == 1, "Yes", "No"))

#Age
eseb$idade <- 2018 - eseb$ANOANIVER

#Sex
eseb$sexo <- as.factor(ifelse(eseb$D2_SEXO == 1, "Male", "Female"))
eseb$sexo <- relevel(eseb$sexo, "Male")

#Religion
eseb$rel <- recode(eseb$D10, "Catholic" <- 3, "Evangelical" <- 5, "Others" <- c(1,2,4, 7:99))
eseb$rel <- relevel(eseb$rel, "Others")

###Imputation of Missing Values###
library(mice)
bdchain <- eseb[c("dep1a", "dep2a", "sexo", "idade", "rel", "bf", "pop1", 
                  "pop2", "pop3", "pop4", "pop5", "pop6", "pop7", "pop8",
                  "pop9", "pop10", "a.eco", "a.gov", "antipt", "Ideologia",
                  "Escolaridade", "escd", "ddir", "desq", "mjor", "mntv", "mrad",
                  "mfcb", "mytb", "mwta", "mobneg")]
bdimp <- mice(bdchain,m=5,maxit=50,meth='pmm',seed=500)
eseba <- eseb
eseb <- complete(bdimp, 1)
rm(bdimp, bdchain)

###Exploratory Factor Analysis###
library(psych)
fa.pop <- eseb[c("pop1", "pop2", "pop3", "pop4", "pop5", "pop6", "pop7", "pop8",
                 "pop9", "pop10")]
fa.parallel(fa.pop, cor = "poly", correct = 0, fa = "fa")
KMO(fa.pop)
#Overall MSA =  0.72
bartlett.test(fa.pop)
#Bartlett's K-squared = 619.43, df = 9, p-value < 0.00000000000000022
fa.pop.t <- fa(fa.pop, nfactors = 2, rotate = "oblimin", fm = "pa", correct = 0, 
               cor = "poly")
print(fa.pop.t$loadings, cutoff = 0.45)
fa.pop.t
rm(fa.pop)

##Anti Politics##
eseb$popa <- eseb$pop2+eseb$pop4+eseb$pop7
eseb$popa <- (eseb$popa - 3)/12

##Anti Pluralism###
eseb$popb <- eseb$pop5+eseb$pop8+eseb$pop10
eseb$popb <- (eseb$popb - 3)/12

#Factor Analysis Results - Grafics#
library(ggplot2)
#Graph2
test <- fa.sort(fa.pop.t)
a <-unclass(test$loadings)
a <- as.data.frame(a)
b <- c("Most politicians don't care about 
        people",
       "Most politicians are concerned only with 
        the rich and powerful",
       "Politicians are Brazil's main 
        problem",
       "Most politicians are reliable",
       "Commitment in politics means to 
        bargain with principles",
       "The people, not the politicians, 
        should make the most important decisions",
       "The will of the majority should prevail, 
        even if it harms minorities",
       "Having a strong leader in government 
        is good, even if he doesn’t follow 
        the rules",
       "Minorities should adapt to Brazilian 
        customs and traditions",
       "When the Supreme Court interferes with 
        the government, the President or Congress 
        can ignore the Court")
b <- as.data.frame(b)
c <- as.data.frame(a$PA1)
colnames(c) <- "val"
c$fator <- "Anti-Politics"
ab <- cbind(c,b)
ab$b <- factor(ab$b, levels = ab$b)
c <- as.data.frame(a$PA2)
colnames(c) <- "val"
c$fator <- "Anti-Pluralism"
ac <- cbind(c, b)
ac$b <- factor(ac$b, levels = ac$b)
plotfa <- rbind(ab, ac)
rm(a, b, c, ab, ac, test)
plot1 <- ggplot(plotfa, aes(factor(b),val, fill=val)) + 
  facet_wrap(~ fator, nrow=1) + 
  geom_bar(stat="identity") + 
  coord_flip() + 
  scale_fill_gradient2(name = "Loading", 
                       high = "blue", mid = "white", low = "red", 
                       midpoint = 0, guide = "none") +
  labs(title = "Exploratory factor analysis results",
       y = "Loading Strength", 
       x = "")+
  theme_bw(base_size=10)
rm(plotfa)

###Descriptive Statistics###
library(table1)
table1(~antipt+ popa+ popb+ idade, data = eseb)
table1(~dep1a+dep2a+sexo+rel+bf+a.eco+a.gov+Ideologia+Escolaridade+
         mjor + mntv +  mrad + mfcb + mytb + mwta + mobneg, data = eseb)

###Models###
#Reescaling Variables#
eseb$dep1 <- ifelse(eseb$dep1a != "Bolsonaro", 0, 1)
eseb$dep1 <- as.factor(eseb$dep1)
eseb$dep2 <- ifelse(eseb$dep2a != "Bolsonaro", 0, 1)
eseb$dep2 <- as.factor(eseb$dep2)
eseb$a.eco <- (eseb$a.eco - min(eseb$a.eco))/(max(eseb$a.eco)-min(eseb$a.eco))
eseb$a.gov <- (eseb$a.gov - min(eseb$a.gov))/(max(eseb$a.gov)-min(eseb$a.gov))
eseb$antipt <- (eseb$antipt - min(eseb$antipt))/(max(eseb$antipt)-min(eseb$antipt))
eseb$popa <- (eseb$popa - min(eseb$popa))/(max(eseb$popa)-min(eseb$popa))
eseb$popb <- (eseb$popb - min(eseb$popb))/(max(eseb$popb)-min(eseb$popb))

#First Round#
turno1 <- glm(dep1 ~ sexo + idade + rel + bf +popa + popb + 
                a.eco + a.gov + antipt + Ideologia + Escolaridade + 
                ddir*escd + desq*escd+
                mjor + mntv +  mrad + mfcb + mytb + mwta + mobneg, data = eseb,
              family = binomial(link = "logit"))
#Second Round#
turno2 <- glm(dep2 ~ sexo + idade + rel + bf +popa + popb + 
                a.eco + a.gov + antipt + Ideologia + Escolaridade + 
                ddir*escd + desq*escd+
                mjor + mntv +  mrad + mfcb + mytb + mwta + mobneg, data = eseb,
              family = binomial(link = "logit"))

###Sensitivity Analysis###
library(InformationValue)
set.seed(1)
sample <- sample(c(TRUE, FALSE), nrow(eseb), replace=TRUE, prob=c(0.7,0.3))
treino <- eseb[sample, ]
teste <- eseb[!sample, ] 
rm(sample)

turno1v <- glm(dep1 ~ sexo + idade + rel + bf +popa + popb + 
                 a.eco + a.gov + antipt + Ideologia + Escolaridade + 
                 ddir*escd + desq*escd+
                 mjor + mntv +  mrad + mfcb + mytb + mwta + mobneg, data =  treino,
               family = binomial(link = "logit"))

previsto1 <- predict.glm(turno1v, teste, type="response")
sensitivity(teste$dep1, previsto1)
specificity(teste$dep1, previsto1)
misClassError(teste$dep1, previsto1, threshold=0.5)

turno2v <- glm(dep2 ~ sexo + idade + rel + bf +popa + popb + 
                 a.eco + a.gov + antipt + Ideologia + Escolaridade + 
                 ddir*escd + desq*escd+
                 mjor + mntv +  mrad + mfcb + mytb + mwta + mobneg, data =  treino,
               family = binomial(link = "logit"))

previsto2 <- predict.glm(turno2v, teste, type="response")
sensitivity(teste$dep2, previsto2)
specificity(teste$dep2, previsto2)
misClassError(teste$dep2, previsto2, threshold=0.5)
rm(treino, teste, turno1v, turno2v, previsto1, previsto2)
#Plot Model 1#
library(sjPlot)
plotm1 <- plot_model(turno1, wrap.labels = 45,
                     title = "Voting for Bolsonaro (1º Round)*",
                     terms = c("sexoFemale", "relEvangelical", 
                               "popb", "antipt", "EscolaridadeHigh", 
                               "mfcbYes", "mwtaYes", "ddirRight:escdHigh"),
                     vline.color = "grey", 
                     sort.est = F, 
                     axis.labels = c("Ideology [Right] * Education [High]",
                                     "WhatsApp as a source of information [Yes]",
                                     "Facebook as a source of information [Yes]",
                                     "Education [High]",
                                     "Antipetismo",
                                     "Anti-pluralism Discourse", 
                                     "Religion [Evangelical]",
                                     "Sex [Female]"),
                     dot.size = 1,
                     line.size = 0.5,
                     show.values = T, value.offset = 0.3, value.size = 3) 

plotm1 <- plotm1 + theme_bw(base_size = 10)+
                   labs(subtitle = "Logit Model",
                        caption = "*Only significant variables were plotted")  

#Plot Model 2#
plotm2 <- plot_model(turno2, wrap.labels = 45,
                     title = "Voting for Bolsonaro (2º Round)*",
                     terms = c("relCatholic", 
                               "relEvangelical", 
                               "bfYes",
                               "antipt", 
                               "popb", 
                               "IdeologiaRight",
                               "mfcbYes", 
                               "mwtaYes",
                               "mytbYes",
                               "ddirRight:escdHigh"),
                     vline.color = "grey", 
                     sort.est = F,
                     axis.labels = c("Education [High]*Ideology [Right]",
                                     "WhatsApp as a source of information [Yes]",
                                     "YouTube as a source of information [Yes]",
                                     "Facebook as a source of information [Yes]",
                                     "Ideology [Right]",
                                     "Antipetismo",
                                     "Anti-pluralism Discourse",
                                     "Bolsa Família Beneficiary [Yes]",
                                     "Religion [Evangelical]",
                                     "Religion [Catholic]"),
                     dot.size = 1,
                     line.size = 0.5,
                     show.values = T, value.offset = 0.3, value.size = 3, case = NULL) 
plotm2 <- plotm2 + theme_bw(base_size = 10)+
                   labs(subtitle = "Logit Model",
                        caption = "*Only significant variables were plotted")

#Predicted Probabilities by media use
#Turno 1#
library(ggeffects)
ploeff1a <- ggpredict(turno1, terms = "mwta", ci.lvl = 0.95)
ploeff1b <- ggpredict(turno1, terms = "mfcb", ci.lvl = 0.95)
ploeff1c <- ggpredict(turno1, terms = "mytb", ci.lvl = 0.95)
ploeff1a$group <- "WhatsApp"
ploeff1b$group <- "Facebook"
ploeff1c$group <- "YouTube"
ploeff1 <- rbind(ploeff1a, ploeff1b)
ploeff1 <- rbind(ploeff1, ploeff1c)
ploeff1$round <- "1º Round"
ploeff2a <- ggpredict(turno2, terms = "mwta", ci.lvl = 0.95)
ploeff2b <- ggpredict(turno2, terms = "mfcb", ci.lvl = 0.95)
ploeff2c <- ggpredict(turno2, terms = "mytb", ci.lvl = 0.95)
ploeff2a$group <- "WhatsApp"
ploeff2b$group <- "Facebook"
ploeff2c$group <- "YouTube"
ploeff2 <- rbind(ploeff2a, ploeff2b)
ploeff2 <- rbind(ploeff2, ploeff2c)
ploeff2$round <- "2º Round"
ploeff <- rbind(ploeff1, ploeff2)

ploteff <- ggplot(ploeff, aes(x, predicted, color = x))
ploteff <- ploteff + geom_point(size = 2)+
  geom_linerange(aes(ymin = conf.low, ymax = conf.high), size = 1)+
  facet_grid(round~factor(group, levels = c("Facebook", "WhatsApp", "YouTube")))+
  theme_bw(base_size=10)+
  theme(legend.position = "none")+
  labs(title = "Predicted Probabilities for Media Use and Voting for Bolsonaro",
       x = "",
       y = "")+
  scale_y_continuous(limits = c(0, 0.5), breaks = seq(0, 0.5, by = 0.1),
                     labels = scales::percent)



rm(ploeff1, ploeff1a, ploeff1b, ploeff1c, ploeff2, ploeff2a, ploeff2b, ploeff2c)

###Ploting Results###
library(gridExtra)
grid.arrange(plot1, plotm1, plotm2, ploteff,
             ncol = 2, nrow = 2)
#Export the plot with width = 1150 and height = 1000

###Methodological Appendix###
#Table A
###Table A - Online Appendix###
write.csv2(fa.pop.t$loadings, "fatorial.csv")
#Table B
table1(~dep1a+dep2a+sexo+idade+Escolaridade+rel+bf+mobneg+Ideologia+a.eco+
         a.gov+antipt+popb+popa + mjor + mntv +  mrad + mfcb + mytb + mwta, data = eseb)

#Table C
#First Round#
eseba$dep1 <- ifelse(eseba$dep1a != "Bolsonaro", 0, 1)
eseba$dep2 <- ifelse(eseba$dep2a != "Bolsonaro", 0, 1)
eseba$popa <- eseba$pop2+eseba$pop4+eseba$pop7
eseba$popa <- (eseba$popa - 3)/12
eseba$popb <- eseba$pop5+eseba$pop8+eseba$pop10
eseba$popb <- (eseba$popb - 3)/12


turno1a <- glm(dep1 ~ sexo + idade + rel + bf +popa + popb + 
                a.eco + a.gov + antipt + Ideologia + Escolaridade + 
                ddir*escd + desq*escd+
                mjor + mntv +  mrad + mfcb + mytb + mwta + mobneg, data = eseba,
              family = binomial(link = "logit"))
#Second Round#
turno2a <- glm(dep2 ~ sexo + idade + rel + bf +popa + popb + 
                a.eco + a.gov + antipt + Ideologia + Escolaridade + 
                ddir*escd + desq*escd+
                mjor + mntv +  mrad + mfcb + mytb + mwta + mobneg, data = eseba,
              family = binomial(link = "logit"))

tab_model(turno1, turno1a, turno2, turno2a,show.ci = F, collapse.se = T, p.style = "stars", 
          wrap.labels = 40)

#Table D
set.seed(1)
sample <- sample(c(TRUE, FALSE), nrow(eseb), replace=TRUE, prob=c(0.7,0.3))
treino <- eseb[sample, ]
teste <- eseb[!sample, ] 

turno1v <- glm(dep1 ~ sexo + idade + rel + bf +popa + popb + 
                 a.eco + a.gov + antipt + Ideologia + Escolaridade + 
                 ddir*escd + desq*escd+
                 mjor + mntv +  mrad + mfcb + mytb + mwta + mobneg, data =  treino,
                 family = binomial(link = "logit"))

previsto1 <- predict.glm(turno1v, teste, type="response")
confusionMatrix(teste$dep1, previsto1)
prop.table(confusionMatrix(teste$dep1, previsto1))
sensitivity(teste$dep1, previsto1)
specificity(teste$dep1, previsto1)
misClassError(teste$dep1, previsto1, threshold=0.5)

#Table E
turno2v <- glm(dep2 ~ sexo + idade + rel + bf +popa + popb + 
                 a.eco + a.gov + antipt + Ideologia + Escolaridade + 
                 ddir*escd + desq*escd+
                 mjor + mntv +  mrad + mfcb + mytb + mwta + mobneg, data =  treino,
                 family = binomial(link = "logit"))

previsto2 <- predict.glm(turno2v, teste, type="response")
confusionMatrix(teste$dep2, previsto2)
prop.table(confusionMatrix(teste$dep2, previsto2))
sensitivity(teste$dep2, previsto2)
specificity(teste$dep2, previsto2)
misClassError(teste$dep2, previsto2, threshold=0.5)

####End of Script####